

get_dt_hosp <- function ( con =  ,
                          data = new_test_cnr,
                          top_All_prof_10 = top_All_prof_10) {
  
  time1 <- Sys.time()
  dt_hosp_raw_original <- data.table::as.data.table(
    DBI::dbGetQuery(
      con,
      paste0("sql_query"
      )
    )
  )
  time2 <- Sys.time()
  print("month cost cnr table extraction took (in mins):")
  print(time2-time1)
  
  
  dt_hosp_raw <- data.table::copy(dt_hosp_raw_original)
  
  dt_hosp_raw<- merge( x= dt_hosp_raw[,amount := as.integer(amount)],
                       y = data[,  c("id_var","DMG_died_within_365d","prob_for_report")] , 
                       by.x = "id_var" ,
                       by.y = "id_var")
  
  dt_hosp_raw[, new_profession:=
                factor( ifelse(profession%in% top_All_prof_10,
                               as.character(profession),
                               "Other"))]
  
  dt_hosp_raw[is.na(actual_cost), actual_cost:=0]
  
  dt_hosp_raw[ year(event_date_end) <2000 |   year(event_date_end) > 2020 , amount := 1 ]
  dt_hosp_raw[ year(event_date_end) <2000 |   year(event_date_end) > 2020 , event_date_end := event_date ]
  
  
  dt_hosp_raw <- 
    dt_hosp_raw[order(id_var ,date_start , zihui_bikur ,-ifelse(is.na(actual_cost), 0, actual_cost))][
      ,`:=`(max_cost_row = 1:.N,
            sum_cost = sum(actual_cost),
            min_event_date = min(event_date) ,
            max_event_date_end = max(if_else(is.na(event_date_end), event_date ,event_date_end))),
      by = .(id_var ,date_start , zihui_bikur)
      ]
  
  dt_hosp_raw[,total_event_length_days:= as.numeric(difftime(max_event_date_end,
                                                             min_event_date,
                                                             units = "days") ) ]
  
  dt_hosp_raw[total_event_length_days<=0 | is.na(total_event_length_days), total_event_length_days:=1]
  
  # complete amount data from time difference - when only if missing or zero 
  # only in case of 1 row per event
  
  # first calculate - then turn zeros to one (maybe it's one day surgery )
  dt_hosp_raw[
    ( is.na(amount) | amount <= 0) &
      !is.na(event_date_end),
    amount := as.integer( difftime(event_date_end,event_date, units = "days")) ] 
  
  dt_hosp_raw[is.na(amount) | amount<= 0 , amount := 1 ]
  
  dt_hosp_raw[,event_length_days:= as.numeric(difftime(event_date_end,
                                                       event_date,
                                                       units = "days") ) ]
  
  dt_hosp_raw[event_length_days<=0 | is.na(event_length_days), event_length_days:=1]
  
  dt_hosp_raw[, main_cat := factor(case_when(	category %in% c('hospitalization_elective', 'differential') ~ 'Inpatient_Planned',
                                             category %in% c('hospitalization_urgent') ~ 'Inpatient_Unplanned',
                                             TRUE ~ 'missing'))]
  
  
  return(dt_hosp_raw)
  
}

